Gender bias in audience of seminars and career position

Data

Data description and summary in script 0_data_summary.

load("data_clean/data_audience.Rdata")
data <- data_a

Modeling

Negative binomial mixed-effects model with the year as random intercept.

mg0 <- glmmTMB(audience_n~ 1 + (1|year), data=data, family= nbinom2)
mg1 <- glmmTMB(audience_n~ gender + (1|year), data=data, family= nbinom2)
mg2 <- glmmTMB(audience_n~ position_cat + (1|year), data=data, family= nbinom2)
mg3 <- glmmTMB(audience_n~ affirm_action + (1|year), data=data, family= nbinom2)

mg4 <- glmmTMB(audience_n~ gender + position_cat + (1|year), 
               data=data, family= nbinom2)
mg5 <- glmmTMB(audience_n~ gender + affirm_action + (1|year), 
               data=data, family= nbinom2)
mg6 <- glmmTMB(audience_n~ affirm_action + position_cat+ (1|year), 
               data=data, family= nbinom2)

mg7 <- glmmTMB(audience_n~ gender * position_cat+ (1|year), 
               data=data, family= nbinom2)
mg8 <- glmmTMB(audience_n~ gender * affirm_action+ (1|year), 
               data=data, family= nbinom2)
mg9 <- glmmTMB(audience_n~ affirm_action * position_cat+ (1|year), 
               data=data, family= nbinom2)

mg10 <- glmmTMB(audience_n~ gender + position_cat + affirm_action + (1|year), 
               data=data, family= nbinom2)
mg11 <- glmmTMB(audience_n~ gender * position_cat + affirm_action + (1|year), 
               data=data, family= nbinom2)
mg12 <- glmmTMB(audience_n~ gender + position_cat * affirm_action + (1|year), 
               data=data, family= nbinom2)

mg13 <- glmmTMB(audience_n~ gender * position_cat * affirm_action + (1|year), 
               data=data, family= nbinom2)

AICtab(mg0,mg1,mg2, mg3, mg4,mg5,mg6,mg7,mg8,mg9,mg10,mg11,mg12,mg13,      base=T, weights=T) %>% kable(digits=2)
AIC dAIC df weight
mg11 2160.03 0.00 9 0.45
mg10 2161.43 1.41 7 0.22
mg7 2162.27 2.24 8 0.15
mg4 2163.49 3.47 6 0.08
mg12 2163.98 3.95 9 0.06
mg13 2166.62 6.59 14 0.02
mg6 2167.07 7.04 6 0.01
mg2 2168.86 8.83 5 0.01
mg9 2170.27 10.25 8 0.00
mg5 2185.83 25.80 5 0.00
mg1 2186.24 26.22 4 0.00
mg8 2187.65 27.62 6 0.00
mg0 2201.16 41.14 3 0.00
mg3 2201.29 41.27 4 0.00

Residual diagnostic

plot(simulateResiduals(mg11))

plot(simulateResiduals(mg10))

Models result

The two equally plausible models for the audience included gender, academic position and affirmative actions as predictors, with the difference that the best fitted model includes an interaction of gender and academic position.

summary(mg11)
##  Family: nbinom2  ( log )
## Formula:          audience_n ~ gender * position_cat + affirm_action + (1 | year)
## Data: data
## 
##      AIC      BIC   logLik deviance df.resid 
##   2160.0   2193.3  -1071.0   2142.0      289 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance Std.Dev.
##  year   (Intercept) 0.009051 0.09513 
## Number of obs: 298, groups:  year, 12
## 
## Dispersion parameter for nbinom2 family (): 7.05 
## 
## Conditional model:
##                               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                    2.80057    0.06192   45.23   <2e-16 ***
## genderM                        0.11642    0.07417    1.57   0.1165    
## position_catpostdoc            0.12249    0.10663    1.15   0.2506    
## position_catprofessor          0.16594    0.10469    1.59   0.1130    
## affirm_actionafter             0.21633    0.09697    2.23   0.0257 *  
## genderM:position_catpostdoc   -0.12984    0.14357   -0.90   0.3658    
## genderM:position_catprofessor  0.22663    0.12674    1.79   0.0738 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
performance::r2(mg11)
## # R2 for Mixed Models
## 
##   Conditional R2: 0.221
##      Marginal R2: 0.180
myg11 <- ggpredict(mg11, terms=c("position_cat","gender", "affirm_action"))
prs <- as.data.frame(myg11) %>% rename(affirm_action = facet)
colnames(prs)[1] <- "position_cat"
#kable(prs , digits = 0)
ggplot(data, aes(x=position_cat, y=audience_n)) +
  geom_point(aes(col=gender), position = position_dodge(0.6), alpha=0.3,
             size=3,show.legend = F) +
  facet_grid(~affirm_action, 
             labeller = as_labeller(c("before"="Before affirmative actions" , "after"="After affirmative actions" ))) +
 #scale_color_manual(values = c("#b2abd2", "#fdb863")) +
  scale_color_manual(values = c("#6D57CF","#FCA532")) +
  scale_fill_manual(name="Gender", values = c("#6D57CF","#FCA532")) +
  geom_pointrange(data=prs, aes(x=position_cat, y=predicted,fill=group,
                                ymax=conf.high, ymin=conf.low), alpha=1,
             position=position_dodge(0.6), size=1, shape=21, col="black") +
  xlab("Academic position") + ylab("Audience (N)") 

ggsave("figures/audience_speakers.jpeg", width=8, height = 4)  
ggplot(data, aes(x=affirm_action, y=audience_n)) +
  geom_point(aes(col=gender), position = position_dodge(0.6), alpha=0.3,
             size=3,show.legend = F) +
  facet_grid(~position_cat, labeller = 
               as_labeller(c(student = "Student",
                             postdoc =  "Post-doc",
                             professor = "Professor"))) +
  scale_color_manual(values = c("#6D57CF","#FCA532")) +
  scale_x_discrete(labels = c("Before", "After"))+
  #scale_y_log10()+
  scale_fill_manual(name="GĂȘnero", values = c("#6D57CF","#FCA532")) +
  geom_pointrange(data=prs, aes(x=affirm_action, y=predicted,fill=group,
                                ymax=conf.high, ymin=conf.low), alpha=1,
             position=position_dodge(0.6), size=1.2, shape=21, col="black") +
    xlab("Affirmative actions") + ylab("Audience (N)") 

ggsave("figures/audience_speakers_b.jpeg", width=8, height = 4)  
summary(mg10)
##  Family: nbinom2  ( log )
## Formula:          audience_n ~ gender + position_cat + affirm_action + (1 | year)
## Data: data
## 
##      AIC      BIC   logLik deviance df.resid 
##   2161.4   2187.3  -1073.7   2147.4      291 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance Std.Dev.
##  year   (Intercept) 0.008642 0.09296 
## Number of obs: 298, groups:  year, 12
## 
## Dispersion parameter for nbinom2 family (): 6.87 
## 
## Conditional model:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            2.78546    0.05768   48.29  < 2e-16 ***
## genderM                0.15395    0.05522    2.79   0.0053 ** 
## position_catpostdoc    0.04292    0.07173    0.60   0.5496    
## position_catprofessor  0.33054    0.06264    5.28 1.32e-07 ***
## affirm_actionafter     0.20807    0.09577    2.17   0.0298 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
performance::r2(mg10)
## # R2 for Mixed Models
## 
##   Conditional R2: 0.207
##      Marginal R2: 0.169
myg10 <- ggpredict(mg10, terms=c("position_cat","gender", "affirm_action"))
pr10 <- as.data.frame(myg10) %>% rename(affirm_action = facet)
colnames(pr10)[1] <- "position_cat"
ggplot(data, aes(x=affirm_action, y=audience_n)) +
  geom_point(aes(col=gender), position = position_dodge(0.6), alpha=0.3,
             size=3,show.legend = F) +
  facet_grid(~position_cat, labeller = 
               as_labeller(c(student = "Student",
                             postdoc =  "Post-doc",
                             professor = "Professor"))) +
  scale_color_manual(values = c("#6D57CF","#FCA532")) +
  scale_x_discrete(labels = c("Before", "After")) +
  scale_fill_manual(name="GĂȘnero", values = c("#6D57CF","#FCA532")) +
  geom_pointrange(data=pr10, aes(x=affirm_action, y=predicted,fill=group,
                                ymax=conf.high, ymin=conf.low), alpha=1,
             position=position_dodge(0.6), size=1.2, shape=21, col="black") +
    xlab("Affirmative actions") + ylab("Audience (N)") 

ggsave("figures/audience_speakers_b.jpeg", width=8, height = 4)  

Only professors - productivity metrics

Investigating if differences in productivity between male and female professors and researches are related to the audience.

We measured productivity publication metrics from Google Scholar for professors and researchers.

Creating productivity index using PCA 1st axis from metrics.

PCA productivity metrics

dp <- data %>% filter(!is.na(data$total_citation_n),
                      !is.na(data$nature_index_count))
table(dp$gender, dp$affirm_action)
##    
##     before after
##   F     14     6
##   M     58     9

Productivity publication metrics

pca1 <- PCA(dp[, c(22:29)], graph=F)
p1 <- fviz_pca_biplot(pca1, col.ind = dp$gender, addEllipses=TRUE,
                      col.ind.sub="none",  geom="point",
                      repel = TRUE) +
  geom_vline(xintercept = 0, linetype="dashed") + 
  geom_hline(yintercept = 0, linetype="dashed")+
  scale_color_manual(name="GĂȘnero",values = c("#6D57CF","#FCA532"))+
  scale_shape(name="GĂȘnero")+
  scale_fill_manual(name="GĂȘnero",values = c("#6D57CF","#FCA532"))+
  ggtitle("Productivity metrics") +
  xlab("PC1 (52%)") + ylab("PC2 (21%)") +
  theme_cowplot()

p1

ggsave("figures/pca_biplot.jpeg", width=6, height = 6)  

For the analysis specific for professor talks (N=87), the PCA results show that all the productivity metrics for professors were highly correlated (Figure 2B) with the first axis (52% of variance explained) while the institution indexes composed the second PCA axis (21% of variation explained).

Extracting PCA 2 first axes

dp$pc1 <- pca1$ind$coord[,1]
dp$pc2 <- pca1$ind$coord[,2]

Modeling

m0 <- glmmTMB(audience_n ~ 1 + affirm_action + (1|year), data=dp, family=nbinom2)
m1 <- glmmTMB(audience_n ~ gender + affirm_action +(1|year), data=dp,
              family=nbinom2)
m2 <- glmmTMB(audience_n ~ pc1 + + affirm_action + (1|year), data=dp,
              family=nbinom2)
m3 <- glmmTMB(audience_n ~ gender + pc1 + affirm_action + (1|year), 
              data=dp, family=nbinom2)
m4 <- glmmTMB(audience_n ~ gender*pc1 + affirm_action + (1|year),
              data=dp, family=nbinom2)

AICtab(m0,m1,m2,m3,m4,
       base=T, weights=T) %>% kable(digits=2)
AIC dAIC df weight
m3 691.32 0.00 6 0.60
m4 692.95 1.64 7 0.27
m2 695.04 3.73 5 0.09
m1 696.94 5.62 5 0.04
m0 702.13 10.82 4 0.00

Residual diagnostic

Best model

plot(simulateResiduals(m3))

plot(simulateResiduals(m4))

Model results

summary(m3)
##  Family: nbinom2  ( log )
## Formula:          audience_n ~ gender + pc1 + affirm_action + (1 | year)
## Data: dp
## 
##      AIC      BIC   logLik deviance df.resid 
##    691.3    706.1   -339.7    679.3       81 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance  Std.Dev. 
##  year   (Intercept) 1.622e-09 4.027e-05
## Number of obs: 87, groups:  year, 11
## 
## Dispersion parameter for nbinom2 family (): 5.52 
## 
## Conditional model:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         3.03070    0.11415  26.550   <2e-16 ***
## genderM             0.30566    0.12366   2.472   0.0134 *  
## pc1                 0.07246    0.02626   2.759   0.0058 ** 
## affirm_actionafter  0.27050    0.13267   2.039   0.0415 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
performance::r2(m3)
## Random effect variances not available. Returned R2 does not account for random effects.
## # R2 for Mixed Models
## 
##   Conditional R2: NA
##      Marginal R2: 0.206

We used the first PCA axis as predictors together with gender to explain the professor’s audience, and found that, as expected, audience increases with productivity index (first PCA axis) but female professors still presented on average audience 1.4 times smaller than male professors.

my3 <- ggpredict(m3, terms=c("pc1","gender"))
my3 <- as.data.frame(my3)
ggplot(my3, aes(x=x, y=predicted, col=group)) +
  geom_ribbon(aes(ymin=conf.low,ymax=conf.high, fill=group), alpha=0.3,
             colour = NA) +
    geom_line()+
  scale_color_manual(name="Gender",values = c("#6D57CF","#FCA532"))+
  scale_fill_manual(name="Gender",values = c("#6D57CF","#FCA532"))+
   theme_cowplot() + ggtitle("") +
  ylab("Audience (N)") + xlab("Productivity index (PC1 axis)")+
  geom_point(data=dp, aes(x=pc1, y=audience_n, col=gender), alpha=0.6)

ggsave("figures/audience_professor.jpeg", width=9, height = 6)  
my3 <- ggpredict(m3, terms=c("gender"))
plot(my3)

my4 <- ggpredict(m4, terms=c("pc1","gender")) %>% as.data.frame()
ggplot(my4, aes(x=x, y=predicted, col=group)) +
  geom_ribbon(aes(ymin=conf.low,ymax=conf.high, fill=group), alpha=0.3,
             colour = NA) +
    geom_line()+
  scale_color_manual(name="Gender",values = c("#6D57CF","#FCA532"))+
  scale_fill_manual(name="Gender",values = c("#6D57CF","#FCA532"))+
   theme_cowplot() + ggtitle("") +
  ylab("Audience (N)") + xlab("Productivity index (PC1 axis)")+
  geom_point(data=dp, aes(x=pc1, y=audience_n, col=gender), alpha=0.6)

#ggsave("figures/audience_professor.jpeg", width=9, height = 6)  

Figure audience

prs <- as.data.frame(myg11) %>% rename(affirm_action = facet,
                                        position_cat=x)
f1 <- ggplot(data, aes(x=affirm_action, y=audience_n)) +
  geom_point(aes(col=gender), position = position_dodge(0.6), alpha=0.3,
             size=3,show.legend = F) +
  facet_grid(~position_cat) +
 #scale_color_manual(values = c("#b2abd2", "#fdb863")) +
  scale_color_manual(values = c("#6D57CF","#FCA532")) +
  scale_fill_manual(name="Gender", values = c("#6D57CF","#FCA532")) +
  geom_pointrange(data=prs, aes(x=affirm_action, y=predicted,fill=group,
                                ymax=conf.high, ymin=conf.low), alpha=1,
             position=position_dodge(0.6), size=1, shape=21, col="black") +
  ylab("Audience (N)")  +
  xlab("Affirmative actions")+
  labs(tag="A")
  
my3 <- ggpredict(m3, terms=c("pc1","gender"))
my3 <- as.data.frame(my3)
#my3$prof <- "Professors only"
f2 <- ggplot(my3, aes(x=x, y=predicted, col=group)) +
  geom_ribbon(aes(ymin=conf.low,ymax=conf.high, fill=group), alpha=0.3,
             colour = NA) +
    geom_line(size=1.5)+
  #facet_grid(~prof)+
  scale_color_manual(name="Gender",values = c("#6D57CF","#FCA532"))+
  scale_fill_manual(name="Gender",values = c("#6D57CF","#FCA532"))+
   theme_cowplot() + ggtitle("") +
  ylab("Audience (N)") + xlab("Productivity index (PC1 axis)")+
  geom_point(data=dp, aes(x=pc1, y=audience_n, col=gender), alpha=0.6,
             size=2)+
  theme(legend.position="none",
         plot.title = element_text(size=12, vjust=2, hjust=0.5)) +
  labs(tag="C", title="Professors' audience")

p1<- fviz_pca_biplot(pca1, col.ind = dp$gender, addEllipses=TRUE,
                      col.ind.sub="none",  geom="point",
                      repel = TRUE) +
  facet_grid(.~.)+
  geom_vline(xintercept = 0, linetype="dashed") + 
  geom_hline(yintercept = 0, linetype="dashed")+
  scale_color_manual(name="gender",values = c("#6D57CF","#FCA532"))+
  scale_shape(name="gender")+
  scale_fill_manual(name="gender",values = c("#6D57CF","#FCA532"))+
  labs(title="Professors' productivity", tag="B") +
  xlab("PC1 (52%)") + ylab("PC2 (21%)") +
  theme_cowplot() +
  theme(legend.position="none",
        plot.title = element_text(size=12, vjust=2, hjust=0.5)) #+
  #coord_cartesian(clip = "off")+
  # scale_x_continuous(limits=c(-6,8), expand=c(0,0))+
  # scale_y_continuous(limits=c(-4,8), breaks=c(-4,-2,0,2,4,6))
  # annotate("rect", xmin=-6, xmax=8, ymin=7,ymax=8, fill="gray85")+
  #annotate("text",label="Professors'productivity", x=0, y=7.5, fill="gray85")



f1/(p1+f2) +plot_layout(guides="collect", heights = c(0.9,1))

ggsave("figures/FIG_audience.jpeg", width=10, height = 9)